print("World Bank")
print(5)
print(5.5)
print(True)
World Bank 5 5.5 True
Data taken from World Bank Repository
import pandas as pd
main_data = pd.read_csv("/content/API_4_DS2_en_csv_v2_1741864.csv", skiprows= 4)
main_data.head()
| Country Name | Country Code | Indicator Name | Indicator Code | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | 1968 | 1969 | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Unnamed: 65 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Population ages 15-64 (% of total population) | SP.POP.1564.TO.ZS | 53.669919 | 54.056784 | 54.383281 | 54.710292 | 55.119933 | 55.631102 | 56.075544 | 56.703126 | 57.414449 | 58.086708 | 58.679715 | 59.55249 | 60.248997 | 60.890062 | 61.639241 | 62.566563 | 63.078929 | 63.850512 | 64.825383 | 65.810270 | 66.635051 | 67.185101 | 67.578450 | 67.804376 | 67.945127 | 68.154412 | 67.755890 | 67.567480 | 67.614074 | 67.749705 | 67.796747 | 68.523104 | 69.021763 | 69.298246 | 69.471969 | 69.667845 | 69.450721 | 69.326281 | 69.279421 | 69.261277 | 69.240421 | 69.323344 | 69.421636 | 69.554820 | 69.734750 | 69.935320 | 69.857389 | 69.690384 | 69.445924 | 69.159726 | 68.838092 | 68.885601 | 68.986934 | 69.108851 | 69.181105 | 69.159774 | 69.137615 | 68.946339 | 68.646606 | 68.321199 | NaN | NaN |
| 1 | Aruba | ABW | Population ages 0-14 (% of total population) | SP.POP.0014.TO.ZS | 43.847190 | 43.358346 | 42.925745 | 42.488756 | 41.950133 | 41.290098 | 40.689595 | 39.900095 | 39.011064 | 38.134727 | 37.316086 | 36.08681 | 35.015040 | 33.989011 | 32.869085 | 31.577559 | 30.878421 | 29.907564 | 28.734007 | 27.542514 | 26.514244 | 25.829247 | 25.314207 | 25.001206 | 24.785155 | 24.527973 | 24.862716 | 24.976954 | 24.853059 | 24.641172 | 24.547459 | 24.084677 | 23.804499 | 23.685590 | 23.598435 | 23.420148 | 23.562500 | 23.583106 | 23.490725 | 23.348127 | 23.179202 | 22.918685 | 22.622958 | 22.277539 | 21.882374 | 21.473343 | 21.223992 | 21.080398 | 21.005742 | 20.949189 | 20.873619 | 20.530937 | 20.092629 | 19.593055 | 19.111724 | 18.703098 | 18.257495 | 17.980183 | 17.802447 | 17.620445 | NaN | NaN |
| 2 | Aruba | ABW | Unemployment, total (% of total labor force) (... | SL.UEM.TOTL.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | Aruba | ABW | Unemployment, male (% of male labor force) (mo... | SL.UEM.TOTL.MA.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | Aruba | ABW | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
import pandas as pd
country_data=pd.read_csv("/content/Metadata_Country_API_4_DS2_en_csv_v2_1741864.csv")
country_data.head()
| Country Code | Region | IncomeGroup | SpecialNotes | TableName | Unnamed: 5 | |
|---|---|---|---|---|---|---|
| 0 | ABW | Latin America & Caribbean | High income | NaN | Aruba | NaN |
| 1 | AFG | South Asia | Low income | NaN | Afghanistan | NaN |
| 2 | AGO | Sub-Saharan Africa | Lower middle income | NaN | Angola | NaN |
| 3 | ALB | Europe & Central Asia | Upper middle income | NaN | Albania | NaN |
| 4 | AND | Europe & Central Asia | High income | NaN | Andorra | NaN |
main_data.columns
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
'1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
'1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
'1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
'1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020', 'Unnamed: 65'],
dtype='object')
main_data['Indicator Name'].unique()
array(['Population ages 15-64 (% of total population)',
'Population ages 0-14 (% of total population)',
'Unemployment, total (% of total labor force) (modeled ILO estimate)',
'Unemployment, male (% of male labor force) (modeled ILO estimate)',
'Unemployment, female (% of female labor force) (modeled ILO estimate)',
'Labor force, total',
'Labor force, female (% of total labor force)',
'Probability of dying among youth ages 20-24 years (per 1,000)',
'Probability of dying among adolescents ages 15-19 years (per 1,000)',
'Probability of dying among adolescents ages 10-14 years (per 1,000)',
'Probability of dying among children ages 5-9 years (per 1,000)',
'Number of deaths ages 20-24 years',
'Number of deaths ages 15-19 years',
'Number of deaths ages 10-14 years',
'Number of deaths ages 5-9 years',
'Government expenditure on education, total (% of GDP)',
'Government expenditure on education, total (% of government expenditure)',
'Expenditure on tertiary education (% of government expenditure on education)',
'Government expenditure per student, tertiary (% of GDP per capita)',
'Expenditure on secondary education (% of government expenditure on education)',
'Government expenditure per student, secondary (% of GDP per capita)',
'Expenditure on primary education (% of government expenditure on education)',
'Government expenditure per student, primary (% of GDP per capita)',
'Current education expenditure, total (% of total expenditure in public institutions)',
'Current education expenditure, tertiary (% of total expenditure in tertiary public institutions)',
'Current education expenditure, secondary (% of total expenditure in secondary public institutions)',
'Current education expenditure, primary (% of total expenditure in primary public institutions)',
'Tertiary education, academic staff (% female)',
'School enrollment, tertiary, male (% gross)',
'School enrollment, tertiary, female (% gross)',
'School enrollment, tertiary (% gross)',
'Pupil-teacher ratio, tertiary',
'Educational attainment, at least completed short-cycle tertiary, population 25+, total (%) (cumulative)',
'Educational attainment, at least completed short-cycle tertiary, population 25+, male (%) (cumulative)',
'Educational attainment, at least completed short-cycle tertiary, population 25+, female (%) (cumulative)',
"Educational attainment, at least Master's or equivalent, population 25+, total (%) (cumulative)",
"Educational attainment, at least Master's or equivalent, population 25+, male (%) (cumulative)",
"Educational attainment, at least Master's or equivalent, population 25+, female (%) (cumulative)",
'Educational attainment, Doctoral or equivalent, population 25+, total (%) (cumulative)',
'Educational attainment, Doctoral or equivalent, population 25+, male (%) (cumulative)',
'Educational attainment, Doctoral or equivalent, population 25+, female (%) (cumulative)',
"Educational attainment, at least Bachelor's or equivalent, population 25+, total (%) (cumulative)",
"Educational attainment, at least Bachelor's or equivalent, population 25+, male (%) (cumulative)",
"Educational attainment, at least Bachelor's or equivalent, population 25+, female (%) (cumulative)",
'Adolescents out of school (% of lower secondary school age)',
'Adolescents out of school, male (% of male lower secondary school age)',
'Adolescents out of school, female (% of female lower secondary school age)',
'Secondary education, teachers (% female)',
'Secondary education, teachers, female',
'Secondary education, teachers',
'Trained teachers in secondary education (% of total teachers)',
'Trained teachers in upper secondary education (% of total teachers)',
'Trained teachers in upper secondary education, male (% of male teachers)',
'Trained teachers in upper secondary education, female (% of female teachers)',
'Trained teachers in secondary education, male (% of male teachers)',
'Trained teachers in lower secondary education (% of total teachers)',
'Trained teachers in lower secondary education, male (% of male teachers)',
'Trained teachers in lower secondary education, female (% of female teachers)',
'Trained teachers in secondary education, female (% of female teachers)',
'Progression to secondary school (%)',
'Progression to secondary school, male (%)',
'Progression to secondary school, female (%)',
'School enrollment, secondary, private (% of total secondary)',
'School enrollment, secondary, male (% net)',
'School enrollment, secondary, female (% net)',
'School enrollment, secondary (% net)',
'School enrollment, secondary, male (% gross)',
'School enrollment, secondary, female (% gross)',
'School enrollment, secondary (% gross)',
'Secondary education, vocational pupils (% female)',
'Secondary education, vocational pupils',
'Pupil-teacher ratio, upper secondary',
'Pupil-teacher ratio, secondary',
'Pupil-teacher ratio, lower secondary',
'Secondary education, general pupils (% female)',
'Secondary education, general pupils',
'Secondary education, pupils (% female)',
'Secondary education, pupils',
'Secondary education, duration (years)',
'Educational attainment, at least completed upper secondary, population 25+, total (%) (cumulative)',
'Educational attainment, at least completed upper secondary, population 25+, male (%) (cumulative)',
'Educational attainment, at least completed upper secondary, population 25+, female (%) (cumulative)',
'Educational attainment, at least completed post-secondary, population 25+, total (%) (cumulative)',
'Educational attainment, at least completed post-secondary, population 25+, male (%) (cumulative)',
'Educational attainment, at least completed post-secondary, population 25+, female (%) (cumulative)',
'Educational attainment, at least completed lower secondary, population 25+, total (%) (cumulative)',
'Educational attainment, at least completed lower secondary, population 25+, male (%) (cumulative)',
'Educational attainment, at least completed lower secondary, population 25+, female (%) (cumulative)',
'Lower secondary completion rate, total (% of relevant age group)',
'Lower secondary completion rate, male (% of relevant age group)',
'Lower secondary completion rate, female (% of relevant age group)',
'Lower secondary school starting age (years)',
'Children out of school (% of primary school age)',
'Children out of school, male (% of male primary school age)',
'Children out of school, primary, male',
'Children out of school, female (% of female primary school age)',
'Children out of school, primary, female',
'Children out of school, primary',
'Adjusted net enrollment rate, primary, male (% of primary school age children)',
'Adjusted net enrollment rate, primary, female (% of primary school age children)',
'Adjusted net enrollment rate, primary (% of primary school age children)',
'Primary education, teachers (% female)',
'Primary education, teachers',
'Trained teachers in primary education (% of total teachers)',
'Trained teachers in primary education, male (% of male teachers)',
'Trained teachers in primary education, female (% of female teachers)',
'Repeaters, primary, total (% of total enrollment)',
'Repeaters, primary, male (% of male enrollment)',
'Repeaters, primary, female (% of female enrollment)',
'Persistence to last grade of primary, total (% of cohort)',
'Persistence to last grade of primary, male (% of cohort)',
'Persistence to last grade of primary, female (% of cohort)',
'Persistence to grade 5, total (% of cohort)',
'Persistence to grade 5, male (% of cohort)',
'Persistence to grade 5, female (% of cohort)',
'School enrollment, primary, private (% of total primary)',
'Over-age students, primary (% of enrollment)',
'Over-age students, primary, male (% of male enrollment)',
'Over-age students, primary, female (% of female enrollment)',
'Net intake rate in grade 1 (% of official school-age population)',
'Net intake rate in grade 1, male (% of official school-age population)',
'Net intake rate in grade 1, female (% of official school-age population)',
'School enrollment, primary, male (% net)',
'School enrollment, primary, female (% net)',
'School enrollment, primary (% net)',
'Gross intake ratio in first grade of primary education, total (% of relevant age group)',
'Gross intake ratio in first grade of primary education, male (% of relevant age group)',
'Gross intake ratio in first grade of primary education, female (% of relevant age group)',
'School enrollment, primary, male (% gross)',
'School enrollment, primary, female (% gross)',
'School enrollment, primary (% gross)',
'Pupil-teacher ratio, primary',
'Primary education, pupils (% female)',
'Primary education, pupils', 'Primary education, duration (years)',
'Educational attainment, at least completed primary, population 25+ years, total (%) (cumulative)',
'Educational attainment, at least completed primary, population 25+ years, male (%) (cumulative)',
'Educational attainment, at least completed primary, population 25+ years, female (%) (cumulative)',
'Primary completion rate, total (% of relevant age group)',
'Primary completion rate, male (% of relevant age group)',
'Primary completion rate, female (% of relevant age group)',
'Primary school starting age (years)',
'Trained teachers in preprimary education (% of total teachers)',
'Trained teachers in preprimary education, male (% of male teachers)',
'Trained teachers in preprimary education, female (% of female teachers)',
'School enrollment, preprimary, male (% gross)',
'School enrollment, preprimary, female (% gross)',
'School enrollment, preprimary (% gross)',
'Pupil-teacher ratio, preprimary',
'Preprimary education, duration (years)',
'School enrollment, tertiary (gross), gender parity index (GPI)',
'School enrollment, secondary (gross), gender parity index (GPI)',
'School enrollment, primary and secondary (gross), gender parity index (GPI)',
'School enrollment, primary (gross), gender parity index (GPI)',
'Compulsory education, duration (years)',
'Literacy rate, adult total (% of people ages 15 and above)',
'Literacy rate, adult male (% of males ages 15 and above)',
'Literacy rate, adult female (% of females ages 15 and above)',
'Literacy rate, youth total (% of people ages 15-24)',
'Literacy rate, youth male (% of males ages 15-24)',
'Literacy rate, youth (ages 15-24), gender parity index (GPI)',
'Literacy rate, youth female (% of females ages 15-24)'],
dtype=object)
main_data_unem=main_data[main_data['Indicator Name']=='Unemployment, female (% of female labor force) (modeled ILO estimate)']
main_data_unem.head()
| Country Name | Country Code | Indicator Name | Indicator Code | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | 1968 | 1969 | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Unnamed: 65 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | Aruba | ABW | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 166 | Afghanistan | AFG | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 | NaN |
| 328 | Angola | AGO | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 | NaN |
| 490 | Albania | ALB | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 | NaN |
| 652 | Andorra | AND | Unemployment, female (% of female labor force)... | SL.UEM.TOTL.FE.ZS | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
main_data_unem= main_data_unem[['Country Name', 'Country Code','1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020']]
main_data_unem.head()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4 | Aruba | ABW | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 166 | Afghanistan | AFG | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 |
| 328 | Angola | AGO | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 |
| 490 | Albania | ALB | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 |
| 652 | Andorra | AND | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
country_data.columns
Index(['Country Code', 'Region', 'IncomeGroup', 'SpecialNotes', 'TableName',
'Unnamed: 5'],
dtype='object')
country_data.columns
country_data=country_data[['Country Code', 'Region', 'IncomeGroup']]
country_data.head()
| Country Code | Region | IncomeGroup | |
|---|---|---|---|
| 0 | ABW | Latin America & Caribbean | High income |
| 1 | AFG | South Asia | Low income |
| 2 | AGO | Sub-Saharan Africa | Lower middle income |
| 3 | ALB | Europe & Central Asia | Upper middle income |
| 4 | AND | Europe & Central Asia | High income |
merged_data = pd.merge(main_data_unem, country_data, on='Country Code')
merged_data.head()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Region | IncomeGroup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Latin America & Caribbean | High income |
| 1 | Afghanistan | AFG | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 | South Asia | Low income |
| 2 | Angola | AGO | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 | Sub-Saharan Africa | Lower middle income |
| 3 | Albania | ALB | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 | Europe & Central Asia | Upper middle income |
| 4 | Andorra | AND | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Europe & Central Asia | High income |
merged_data.columns
Index(['Country Name', 'Country Code', '1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020', 'Region',
'IncomeGroup'],
dtype='object')
merged_data=merged_data[['Country Name', 'Country Code', '1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020', 'Region',
'IncomeGroup']]
merged_data.head()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Region | IncomeGroup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Latin America & Caribbean | High income |
| 1 | Afghanistan | AFG | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 | South Asia | Low income |
| 2 | Angola | AGO | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 | Sub-Saharan Africa | Lower middle income |
| 3 | Albania | ALB | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 | Europe & Central Asia | Upper middle income |
| 4 | Andorra | AND | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Europe & Central Asia | High income |
merged_data.isna()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Region | IncomeGroup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | False | False |
| 1 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 4 | False | False | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 258 | False | False | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | True | False | False |
| 259 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 260 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 261 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
| 262 | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False | False |
263 rows × 34 columns
merged_data.isna().sum().head()
Country Name 0 Country Code 0 1991 30 1992 30 1993 30 dtype: int64
The shape of merged_data
merged_data.isna().shape
(263, 34)
merged_data_clean = merged_data.dropna()
merged_data_clean.head()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Region | IncomeGroup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Afghanistan | AFG | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505000 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 | South Asia | Low income |
| 2 | Angola | AGO | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994000 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 | Sub-Saharan Africa | Lower middle income |
| 3 | Albania | ALB | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739000 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 | Europe & Central Asia | Upper middle income |
| 6 | United Arab Emirates | ARE | 2.431 | 2.115000 | 2.259 | 2.259000 | 2.359000 | 2.441000 | 2.501000 | 2.513 | 2.529 | 2.718000 | 3.355000 | 3.978 | 5.107 | 6.390000 | 7.221000 | 6.682000 | 5.829 | 5.419 | 5.843 | 5.883 | 5.983 | 5.956 | 5.851 | 5.214 | 4.703 | 4.200 | 7.136 | 6.187 | 6.046 | 6.042 | Middle East & North Africa | High income |
| 7 | Argentina | ARG | 5.747 | 6.711000 | 12.558 | 13.927000 | 22.195999 | 19.190001 | 17.631001 | 14.029 | 15.147 | 16.344999 | 17.191999 | 18.830 | 17.549 | 15.789000 | 13.561000 | 12.392000 | 10.544 | 9.720 | 9.855 | 9.196 | 8.496 | 8.811 | 8.484 | 8.383 | 8.851 | 9.118 | 9.464 | 10.538 | 10.922 | 11.487 | Latin America & Caribbean | Upper middle income |
This is the new size once the dataset is cleaned up
merged_data_clean.shape
(187, 34)
grouped_data_income = merged_data_clean.groupby(['IncomeGroup']).mean()
grouped_data_income
| 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| IncomeGroup | ||||||||||||||||||||||||||||||
| High income | 7.736467 | 8.250783 | 9.243850 | 9.437617 | 9.237233 | 9.212950 | 8.843950 | 8.693183 | 8.703783 | 8.479150 | 8.252700 | 8.419200 | 8.672367 | 8.563100 | 8.345683 | 7.703300 | 7.011633 | 6.799433 | 8.279067 | 8.769983 | 8.888267 | 9.132533 | 9.244683 | 8.827083 | 8.370100 | 7.900950 | 7.301283 | 6.753733 | 6.569350 | 6.594800 |
| Low income | 5.191034 | 5.122586 | 5.241103 | 5.521621 | 5.722138 | 5.862035 | 5.939793 | 6.142310 | 6.236034 | 6.476931 | 6.796276 | 6.910207 | 6.998379 | 7.046828 | 7.092172 | 7.077345 | 7.154931 | 7.056069 | 7.421069 | 7.645621 | 7.609897 | 7.646276 | 7.719931 | 7.709862 | 7.734690 | 7.701448 | 7.599138 | 7.491828 | 7.458897 | 7.440862 |
| Lower middle income | 7.770750 | 7.832417 | 8.115313 | 8.255917 | 8.423167 | 8.522042 | 8.436583 | 8.552500 | 8.596146 | 8.522167 | 8.632187 | 8.800479 | 8.815729 | 8.629063 | 8.552938 | 8.074896 | 7.795771 | 7.745271 | 8.212021 | 8.366792 | 8.326208 | 8.061479 | 8.041271 | 7.941438 | 8.267479 | 8.405146 | 8.481042 | 8.283375 | 8.333917 | 8.284333 |
| Upper middle income | 11.855680 | 11.943880 | 12.369380 | 12.909100 | 13.475140 | 13.965980 | 14.108920 | 14.097900 | 14.203700 | 13.907960 | 13.776640 | 14.130820 | 13.996420 | 13.734020 | 13.419940 | 12.613340 | 11.910920 | 11.348980 | 12.007120 | 12.239020 | 12.157240 | 12.124720 | 12.233620 | 12.294740 | 12.246720 | 12.158600 | 11.795040 | 11.403020 | 11.610500 | 11.690940 |
grouped_data_income_describe = merged_data_clean.groupby(['IncomeGroup']).describe()
grouped_data_income_describe
| 1991 | 1992 | 1993 | 1994 | 1995 | ... | 2016 | 2017 | 2018 | 2019 | 2020 | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | ... | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | |
| IncomeGroup | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| High income | 60.0 | 7.736467 | 5.462825 | 0.950 | 3.31900 | 6.4480 | 10.781250 | 23.796000 | 60.0 | 8.250783 | 5.555074 | 0.941 | 3.86550 | 7.2745 | 10.7155 | 25.673000 | 60.0 | 9.243850 | 5.694475 | 1.941 | 5.15075 | 8.0525 | 12.882000 | 28.754999 | 60.0 | 9.437617 | 5.917561 | 1.701 | 5.13950 | 8.0965 | 12.88925 | 31.612000 | 60.0 | 9.237233 | 5.574981 | 1.685 | 4.96625 | 8.2605 | 12.103750 | 30.554001 | ... | 60.0 | 7.900950 | 5.015357 | 0.717 | 4.76175 | 6.548 | 9.86525 | 28.143000 | 60.0 | 7.301283 | 4.628991 | 0.639 | 4.31700 | 5.7610 | 9.37525 | 26.115000 | 60.0 | 6.753733 | 4.506657 | 0.479 | 3.91675 | 5.4690 | 8.83825 | 24.290001 | 60.0 | 6.569350 | 4.338601 | 0.432 | 3.60575 | 5.2840 | 8.182500 | 22.114000 | 60.0 | 6.594800 | 4.264579 | 0.385 | 3.75375 | 5.3650 | 8.22600 | 22.454000 |
| Low income | 29.0 | 5.191034 | 5.147265 | 0.149 | 2.12800 | 3.1830 | 6.633000 | 20.309999 | 29.0 | 5.122586 | 4.996712 | 0.197 | 2.20300 | 3.1860 | 6.6590 | 20.263000 | 29.0 | 5.241103 | 4.894268 | 0.248 | 2.28600 | 3.3970 | 6.455000 | 20.337999 | 29.0 | 5.521621 | 5.044311 | 0.287 | 2.29500 | 3.4060 | 7.74100 | 20.386000 | 29.0 | 5.722138 | 5.205193 | 0.347 | 2.27900 | 3.5820 | 8.388000 | 20.440001 | ... | 29.0 | 7.701448 | 7.734406 | 0.406 | 2.32900 | 3.708 | 11.26200 | 29.875000 | 29.0 | 7.599138 | 7.687203 | 0.387 | 2.27500 | 3.6240 | 11.11300 | 29.513000 | 29.0 | 7.491828 | 7.620008 | 0.369 | 2.21100 | 3.5520 | 11.00800 | 29.125999 | 29.0 | 7.458897 | 7.446798 | 0.360 | 2.22400 | 3.5650 | 11.079000 | 27.768999 | 29.0 | 7.440862 | 7.336123 | 0.363 | 2.22700 | 3.5800 | 11.13500 | 27.171000 |
| Lower middle income | 48.0 | 7.770750 | 8.743691 | 0.313 | 1.90375 | 4.7465 | 10.250750 | 45.993999 | 48.0 | 7.832417 | 8.727702 | 0.343 | 2.40075 | 4.6000 | 10.4555 | 46.556000 | 48.0 | 8.115313 | 8.898775 | 0.301 | 2.59600 | 4.6410 | 10.475750 | 46.983002 | 48.0 | 8.255917 | 8.910337 | 0.338 | 2.82550 | 5.0400 | 10.61950 | 47.308998 | 48.0 | 8.423167 | 9.070520 | 0.334 | 2.97425 | 4.9965 | 10.513500 | 47.395000 | ... | 48.0 | 8.405146 | 8.068344 | 0.625 | 3.15525 | 5.815 | 10.61175 | 37.929001 | 48.0 | 8.481042 | 8.467212 | 0.605 | 3.29375 | 5.6260 | 10.39750 | 42.769001 | 48.0 | 8.283375 | 8.379838 | 0.585 | 3.00875 | 5.5540 | 10.28800 | 41.849998 | 48.0 | 8.333917 | 8.201198 | 0.569 | 3.48350 | 5.6590 | 10.179250 | 40.945000 | 48.0 | 8.284333 | 8.057929 | 0.547 | 3.49675 | 5.7435 | 10.03575 | 40.616001 |
| Upper middle income | 50.0 | 11.855680 | 10.078790 | 0.118 | 3.63400 | 8.7430 | 16.647749 | 39.773998 | 50.0 | 11.943880 | 10.088228 | 0.443 | 4.11700 | 7.9385 | 18.1920 | 40.058998 | 50.0 | 12.369380 | 9.840402 | 0.442 | 4.89500 | 9.7990 | 18.579999 | 40.325001 | 50.0 | 12.909100 | 9.476483 | 1.334 | 6.03175 | 10.4980 | 16.69925 | 40.691002 | 50.0 | 13.475140 | 9.391157 | 1.311 | 6.55975 | 11.2455 | 17.850501 | 40.675999 | ... | 50.0 | 12.158600 | 8.192098 | 0.714 | 5.51200 | 10.785 | 17.34325 | 29.976999 | 50.0 | 11.795040 | 8.160169 | 0.845 | 5.13225 | 10.0515 | 16.93575 | 31.020000 | 50.0 | 11.403020 | 7.903636 | 0.743 | 4.68400 | 10.2675 | 16.34250 | 30.851000 | 50.0 | 11.610500 | 7.967784 | 0.742 | 4.62200 | 10.3435 | 16.614751 | 30.403999 | 50.0 | 11.690940 | 7.991920 | 0.791 | 4.83225 | 10.2210 | 15.78975 | 30.809999 |
4 rows × 240 columns
transpose_income_group = grouped_data_income.transpose()
transpose_income_group
| IncomeGroup | High income | Low income | Lower middle income | Upper middle income |
|---|---|---|---|---|
| 1991 | 7.736467 | 5.191034 | 7.770750 | 11.85568 |
| 1992 | 8.250783 | 5.122586 | 7.832417 | 11.94388 |
| 1993 | 9.243850 | 5.241103 | 8.115313 | 12.36938 |
| 1994 | 9.437617 | 5.521621 | 8.255917 | 12.90910 |
| 1995 | 9.237233 | 5.722138 | 8.423167 | 13.47514 |
| 1996 | 9.212950 | 5.862035 | 8.522042 | 13.96598 |
| 1997 | 8.843950 | 5.939793 | 8.436583 | 14.10892 |
| 1998 | 8.693183 | 6.142310 | 8.552500 | 14.09790 |
| 1999 | 8.703783 | 6.236034 | 8.596146 | 14.20370 |
| 2000 | 8.479150 | 6.476931 | 8.522167 | 13.90796 |
| 2001 | 8.252700 | 6.796276 | 8.632187 | 13.77664 |
| 2002 | 8.419200 | 6.910207 | 8.800479 | 14.13082 |
| 2003 | 8.672367 | 6.998379 | 8.815729 | 13.99642 |
| 2004 | 8.563100 | 7.046828 | 8.629063 | 13.73402 |
| 2005 | 8.345683 | 7.092172 | 8.552938 | 13.41994 |
| 2006 | 7.703300 | 7.077345 | 8.074896 | 12.61334 |
| 2007 | 7.011633 | 7.154931 | 7.795771 | 11.91092 |
| 2008 | 6.799433 | 7.056069 | 7.745271 | 11.34898 |
| 2009 | 8.279067 | 7.421069 | 8.212021 | 12.00712 |
| 2010 | 8.769983 | 7.645621 | 8.366792 | 12.23902 |
| 2011 | 8.888267 | 7.609897 | 8.326208 | 12.15724 |
| 2012 | 9.132533 | 7.646276 | 8.061479 | 12.12472 |
| 2013 | 9.244683 | 7.719931 | 8.041271 | 12.23362 |
| 2014 | 8.827083 | 7.709862 | 7.941438 | 12.29474 |
| 2015 | 8.370100 | 7.734690 | 8.267479 | 12.24672 |
| 2016 | 7.900950 | 7.701448 | 8.405146 | 12.15860 |
| 2017 | 7.301283 | 7.599138 | 8.481042 | 11.79504 |
| 2018 | 6.753733 | 7.491828 | 8.283375 | 11.40302 |
| 2019 | 6.569350 | 7.458897 | 8.333917 | 11.61050 |
| 2020 | 6.594800 | 7.440862 | 8.284333 | 11.69094 |
import matplotlib.pyplot as plt
%matplotlib inline
transpose_income_group.plot(figsize=(20,10))
plt.title("Mean % Female Unemployment")
plt.xlabel("Year")
plt.ylabel("% UnEmployment")
Text(0, 0.5, '% UnEmployment')
grouped_data_region = merged_data_clean.groupby(['Region']).describe()
grouped_data_region = merged_data_clean.groupby(['Region']).mean()
transposed_region = grouped_data_region.transpose()
transposed_region.plot(figsize=(20,10))
plt.title('Mean % Female Unemployment by Region')
plt.xlabel('Year')
plt.ylabel("% UnEmployment")
Text(0, 0.5, '% UnEmployment')
import scipy.stats as stats
merged_data_clean.head()
| Country Name | Country Code | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | Region | IncomeGroup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Afghanistan | AFG | 14.226 | 14.348000 | 14.391 | 14.515000 | 14.803000 | 14.505000 | 14.699000 | 14.710 | 14.794 | 14.733000 | 14.702000 | 15.036 | 14.859 | 14.877000 | 14.910000 | 14.431000 | 14.724 | 14.154 | 14.911 | 14.815 | 14.781 | 14.820 | 14.680 | 14.505 | 14.427 | 14.314 | 14.090 | 13.906 | 14.004 | 14.062 | South Asia | Low income |
| 2 | Angola | AGO | 2.637 | 2.723000 | 2.695 | 2.882000 | 2.948000 | 2.994000 | 2.876000 | 2.967 | 2.932 | 2.798000 | 2.811000 | 2.899 | 2.833 | 2.882000 | 2.852000 | 2.746000 | 2.723 | 2.710 | 2.845 | 10.922 | 7.718 | 7.788 | 7.772 | 7.719 | 7.681 | 7.563 | 7.467 | 7.327 | 6.942 | 6.631 | Sub-Saharan Africa | Lower middle income |
| 3 | Albania | ALB | 15.804 | 16.247999 | 16.711 | 16.749001 | 16.766001 | 16.739000 | 16.434999 | 16.829 | 16.916 | 16.898001 | 16.992001 | 17.063 | 17.104 | 17.011999 | 16.919001 | 16.643999 | 16.399 | 13.752 | 15.734 | 15.881 | 13.762 | 11.467 | 13.345 | 15.153 | 17.098 | 14.573 | 12.563 | 11.229 | 11.604 | 12.190 | Europe & Central Asia | Upper middle income |
| 6 | United Arab Emirates | ARE | 2.431 | 2.115000 | 2.259 | 2.259000 | 2.359000 | 2.441000 | 2.501000 | 2.513 | 2.529 | 2.718000 | 3.355000 | 3.978 | 5.107 | 6.390000 | 7.221000 | 6.682000 | 5.829 | 5.419 | 5.843 | 5.883 | 5.983 | 5.956 | 5.851 | 5.214 | 4.703 | 4.200 | 7.136 | 6.187 | 6.046 | 6.042 | Middle East & North Africa | High income |
| 7 | Argentina | ARG | 5.747 | 6.711000 | 12.558 | 13.927000 | 22.195999 | 19.190001 | 17.631001 | 14.029 | 15.147 | 16.344999 | 17.191999 | 18.830 | 17.549 | 15.789000 | 13.561000 | 12.392000 | 10.544 | 9.720 | 9.855 | 9.196 | 8.496 | 8.811 | 8.484 | 8.383 | 8.851 | 9.118 | 9.464 | 10.538 | 10.922 | 11.487 | Latin America & Caribbean | Upper middle income |
income_groups = merged_data_clean['IncomeGroup'].unique()
print(income_groups)
len(income_groups)
['Low income' 'Lower middle income' 'Upper middle income' 'High income']
4
from IPython.display import display
with pd.option_context('display.max_rows', 299, 'display.max_columns', 40):
display(merged_data_clean) #need display to show all data
#how many partipant countries
CC_countries = merged_data_clean['Country Code'].unique()
print(CC_countries)
print(len(CC_countries))
['AFG' 'AGO' 'ALB' 'ARE' 'ARG' 'ARM' 'AUS' 'AUT' 'AZE' 'BDI' 'BEL' 'BEN' 'BFA' 'BGD' 'BGR' 'BHR' 'BHS' 'BIH' 'BLR' 'BLZ' 'BOL' 'BRA' 'BRB' 'BRN' 'BTN' 'BWA' 'CAF' 'CAN' 'CHE' 'CHI' 'CHL' 'CHN' 'CIV' 'CMR' 'COD' 'COG' 'COL' 'COM' 'CPV' 'CRI' 'CUB' 'CYP' 'CZE' 'DEU' 'DJI' 'DNK' 'DOM' 'DZA' 'ECU' 'EGY' 'ERI' 'ESP' 'EST' 'ETH' 'FIN' 'FJI' 'FRA' 'GAB' 'GBR' 'GEO' 'GHA' 'GIN' 'GMB' 'GNB' 'GNQ' 'GRC' 'GTM' 'GUM' 'GUY' 'HKG' 'HND' 'HRV' 'HTI' 'HUN' 'IDN' 'IND' 'IRL' 'IRN' 'IRQ' 'ISL' 'ISR' 'ITA' 'JAM' 'JOR' 'JPN' 'KAZ' 'KEN' 'KGZ' 'KHM' 'KOR' 'KWT' 'LAO' 'LBN' 'LBR' 'LBY' 'LCA' 'LKA' 'LSO' 'LTU' 'LUX' 'LVA' 'MAC' 'MAR' 'MDA' 'MDG' 'MDV' 'MEX' 'MKD' 'MLI' 'MLT' 'MMR' 'MNE' 'MNG' 'MOZ' 'MRT' 'MUS' 'MWI' 'MYS' 'NAM' 'NCL' 'NER' 'NGA' 'NIC' 'NLD' 'NOR' 'NPL' 'NZL' 'OMN' 'PAK' 'PAN' 'PER' 'PHL' 'PNG' 'POL' 'PRI' 'PRK' 'PRT' 'PRY' 'PSE' 'PYF' 'QAT' 'ROU' 'RUS' 'RWA' 'SAU' 'SDN' 'SEN' 'SGP' 'SLB' 'SLE' 'SLV' 'SOM' 'SRB' 'SSD' 'STP' 'SUR' 'SVK' 'SVN' 'SWE' 'SWZ' 'SYR' 'TCD' 'TGO' 'THA' 'TJK' 'TKM' 'TLS' 'TON' 'TTO' 'TUN' 'TUR' 'TZA' 'UGA' 'UKR' 'URY' 'USA' 'UZB' 'VCT' 'VEN' 'VIR' 'VNM' 'VUT' 'WSM' 'YEM' 'ZAF' 'ZMB' 'ZWE'] 187
CC_IG = merged_data_clean[['Country Code','IncomeGroup']]
print(CC_IG)
print(len(CC_IG))
Country Code IncomeGroup 1 AFG Low income 2 AGO Lower middle income 3 ALB Upper middle income 6 ARE High income 7 ARG Upper middle income .. ... ... 257 WSM Upper middle income 259 YEM Low income 260 ZAF Upper middle income 261 ZMB Lower middle income 262 ZWE Lower middle income [187 rows x 2 columns] 187
CC_IG_GP= CC_IG.groupby(['Country Code']).describe()
print(CC_IG_GP)
print(len(CC_IG_GP))
IncomeGroup
count unique top freq
Country Code
AFG 1 1 Low income 1
AGO 1 1 Lower middle income 1
ALB 1 1 Upper middle income 1
ARE 1 1 High income 1
ARG 1 1 Upper middle income 1
... ... ... ... ...
WSM 1 1 Upper middle income 1
YEM 1 1 Low income 1
ZAF 1 1 Upper middle income 1
ZMB 1 1 Lower middle income 1
ZWE 1 1 Lower middle income 1
[187 rows x 4 columns]
187
IG_IG_GP= CC_IG.groupby(['IncomeGroup']).describe()
print(IG_IG_GP)
print(len(IG_IG_GP))
Country Code
count unique top freq
IncomeGroup
High income 60 60 BEL 1
Low income 29 29 NER 1
Lower middle income 48 48 TUN 1
Upper middle income 50 50 TKM 1
4
CC_IG_2019 = merged_data_clean[['Country Code','2019','IncomeGroup']]
print(CC_IG_2019)
print(len(CC_IG_2019))
Country Code 2019 IncomeGroup 1 AFG 14.004000 Low income 2 AGO 6.942000 Lower middle income 3 ALB 11.604000 Upper middle income 6 ARE 6.046000 High income 7 ARG 10.922000 Upper middle income .. ... ... ... 257 WSM 9.837000 Upper middle income 259 YEM 24.879999 Low income 260 ZAF 30.334999 Upper middle income 261 ZMB 12.237000 Lower middle income 262 ZWE 5.458000 Lower middle income [187 rows x 3 columns] 187
import statistics
grouped_data_income = merged_data_clean.groupby(['IncomeGroup']).mean()
grouped_data_income
#grouped_data_income_2019= grouped_data_income(['2019'])
#Data2019=merged_data_clean['2019']
#mode = statistics.mode(transpose_income_group.groupby([]))
#print(mode)
| 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| IncomeGroup | ||||||||||||||||||||||||||||||
| High income | 7.736467 | 8.250783 | 9.243850 | 9.437617 | 9.237233 | 9.212950 | 8.843950 | 8.693183 | 8.703783 | 8.479150 | 8.252700 | 8.419200 | 8.672367 | 8.563100 | 8.345683 | 7.703300 | 7.011633 | 6.799433 | 8.279067 | 8.769983 | 8.888267 | 9.132533 | 9.244683 | 8.827083 | 8.370100 | 7.900950 | 7.301283 | 6.753733 | 6.569350 | 6.594800 |
| Low income | 5.191034 | 5.122586 | 5.241103 | 5.521621 | 5.722138 | 5.862035 | 5.939793 | 6.142310 | 6.236034 | 6.476931 | 6.796276 | 6.910207 | 6.998379 | 7.046828 | 7.092172 | 7.077345 | 7.154931 | 7.056069 | 7.421069 | 7.645621 | 7.609897 | 7.646276 | 7.719931 | 7.709862 | 7.734690 | 7.701448 | 7.599138 | 7.491828 | 7.458897 | 7.440862 |
| Lower middle income | 7.770750 | 7.832417 | 8.115313 | 8.255917 | 8.423167 | 8.522042 | 8.436583 | 8.552500 | 8.596146 | 8.522167 | 8.632187 | 8.800479 | 8.815729 | 8.629063 | 8.552938 | 8.074896 | 7.795771 | 7.745271 | 8.212021 | 8.366792 | 8.326208 | 8.061479 | 8.041271 | 7.941438 | 8.267479 | 8.405146 | 8.481042 | 8.283375 | 8.333917 | 8.284333 |
| Upper middle income | 11.855680 | 11.943880 | 12.369380 | 12.909100 | 13.475140 | 13.965980 | 14.108920 | 14.097900 | 14.203700 | 13.907960 | 13.776640 | 14.130820 | 13.996420 | 13.734020 | 13.419940 | 12.613340 | 11.910920 | 11.348980 | 12.007120 | 12.239020 | 12.157240 | 12.124720 | 12.233620 | 12.294740 | 12.246720 | 12.158600 | 11.795040 | 11.403020 | 11.610500 | 11.690940 |
income_groups
print(income_groups)
income_groups[0]
for income_group in income_groups:
print(income_group)
['Low income' 'Lower middle income' 'Upper middle income' 'High income'] Low income Lower middle income Upper middle income High income
income_group_data=[]
for i in range (len(income_groups)):
income_group_data.append(merged_data_clean['2019'][merged_data_clean['IncomeGroup']==income_groups[i]])
income_group_data
[1 14.004000 14 1.009000 17 9.223000 32 3.565000 41 3.404000 67 4.953000 70 2.756000 83 3.316000 84 12.237000 85 2.210000 98 16.693001 128 2.224000 148 1.846000 155 7.954000 162 3.453000 165 6.617000 170 0.360000 190 2.331000 200 1.070000 203 27.768999 207 3.628000 210 11.079000 213 13.406000 224 20.837000 226 1.706000 229 1.547000 231 9.935000 244 2.296000 259 24.879999 Name: 2019, dtype: float64, 2 6.942000 16 2.336000 18 6.204000 26 3.809000 30 3.200000 39 3.772000 40 3.872000 42 10.105000 44 4.702000 45 11.491000 54 10.402000 58 21.080000 65 22.150000 81 4.464000 95 7.003000 107 5.233000 118 2.800000 119 7.467000 120 0.824000 126 0.569000 135 6.687000 138 27.118000 145 10.415000 147 4.155000 157 2.074000 160 5.630000 163 12.112000 171 8.908000 172 6.696000 175 1.210000 181 5.596000 184 2.442000 186 1.378000 193 40.945000 204 7.446000 206 0.595000 208 3.578000 216 20.919001 221 23.660999 234 6.169000 240 23.410000 243 2.462000 245 7.796000 249 5.688000 254 1.904000 255 4.914000 261 12.237000 262 5.458000 Name: 2019, dtype: float64, 3 11.604000 7 10.922000 8 17.295000 13 6.335000 19 3.969000 22 21.002001 23 3.350000 24 9.653000 27 14.139000 31 21.222000 38 3.727000 43 12.713000 46 14.846000 48 1.776000 57 8.279000 64 4.958000 74 5.320000 78 28.507000 80 12.880000 86 6.184000 90 3.412000 92 15.189000 104 4.510000 109 18.566999 110 30.403999 114 10.807000 115 23.344999 117 5.222000 127 9.880000 129 24.570000 130 23.256001 149 5.866000 151 3.712000 154 17.049000 159 15.730000 166 3.740000 168 19.686001 183 3.409000 192 5.784000 199 4.462000 211 13.780000 217 11.923000 230 0.742000 232 2.179000 236 2.027000 241 16.422001 250 16.679001 251 9.320000 257 9.837000 260 30.334999 Name: 2019, dtype: float64, 6 6.046000 11 5.328000 12 4.547000 15 5.136000 20 2.963000 21 10.801000 28 10.692000 29 9.907000 33 5.263000 35 4.768000 36 7.695000 37 7.702000 51 8.122000 52 2.384000 53 2.694000 56 5.194000 68 16.204000 69 4.966000 73 6.296000 75 8.364000 79 3.575000 87 22.016001 91 5.305000 94 3.198000 97 8.644000 99 3.480000 108 4.674000 111 2.835000 112 3.898000 113 10.838000 116 2.145000 123 3.898000 124 5.524000 140 6.305000 141 5.890000 142 5.631000 143 1.799000 156 3.774000 164 9.999000 169 14.080000 173 3.362000 174 3.244000 177 4.449000 179 11.910000 182 5.055000 187 3.509000 189 6.214000 191 6.946000 196 13.227000 197 0.432000 198 3.423000 202 22.114000 205 4.314000 218 6.182000 219 4.660000 220 6.220000 239 2.825000 247 10.658000 248 3.616000 253 9.221000 Name: 2019, dtype: float64]
statistic, pvalue = stats.f_oneway(income_group_data[0],
income_group_data[1],
income_group_data[2],
income_group_data[3])
print("statistic: %s pvalue %s" %(statistic,pvalue))
statistic: 5.052886221006664 pvalue 0.0021894356280740147
regions = merged_data_clean['Region'].unique()
print(regions)
regions_data=[]
for i in range (len(regions)):
regions_data.append(merged_data_clean['2019'][merged_data_clean['Region']==regions[i]])
statistic, pvalue = stats.f_oneway(regions_data[0],
regions_data[1],
regions_data[2],
regions_data[3],regions_data[4],
regions_data[5],regions_data[6])
print("statistic: %s pvalue %s" %(statistic,pvalue))
['South Asia' 'Sub-Saharan Africa' 'Europe & Central Asia' 'Middle East & North Africa' 'Latin America & Caribbean' 'East Asia & Pacific' 'North America'] statistic: 7.031185985789595 pvalue 9.75436578876421e-07
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.set_title("Boxplot of % Female Unemployment by Income Group")
ax.set
ax.boxplot(income_group_data, labels = income_groups, showmeans =True)
plt.xlabel("Country Income Group")
plt.ylabel("% Female Unemployment")
plt.show()
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py:83: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray return array(a, dtype, copy=False, order=order)
Analysis of variance (ANOVA) is a statistical technique that is used to check if the means of two or more groups are significantly different from each other. ANOVA checks the impact of one or more factors by comparing the means of different samples. Analytics Vidhya%20is,the%20means%20of%20different%20samples.&text=Another%20measure%20to%20compare%20the%20samples%20is%20called%20a%20t%2Dtest)
In our case, the impact of the factor Income Group to the different means of the % Female Unemployment will be analysed.
Hypothesis Testing - Analysis of Variance (ANOVA)
The null hypothesis in ANOVA is always that there is no difference in means.
H0: mu1=mu2=mu3=mu4
The alternative hypothesis is always that the means are not all equal
H1: means are not all equal
The test statistic for testing H0: μ1 = μ2 = ... = μk is:
And follows the table of calculations:
Assumptions while calculating test statisitic F:
Groups are the income groups: IG1, IG2, IG3, IG4
Sample Size for each group: n1 =60, n2=29, n3=48,n4=50
Sample mean: mu1, mu2, mu3, mu4
Sample standard deviation: s1,s2,s3,s4
k=len(pd.unique(merged_data_clean.IncomeGroup))
N=len(merged_data_clean.values)
#Degrees of Freedom Between Treatments
df1= k-1
#Degrees of Freedom within Treatments
df2 = N-k
#Total Degrees of Freedom
dfT=N-1
print(k,N, df1, df2, dfT)
4 187 3 183 186
n0=merged_data_clean.groupby('IncomeGroup').size()[0]
n1=merged_data_clean.groupby('IncomeGroup').size()[1]
n2=merged_data_clean.groupby('IncomeGroup').size()[2]
n3=merged_data_clean.groupby('IncomeGroup').size()[3]
N=n0+n1+n2+n3
print(n0,n1,n2,n3,n0+n1+n2+n3)
60 29 48 50 187
grandmu=(merged_data_clean['2019'].sum()/N)
print(grandmu)
8.50813905424613
We start by calculating the Sum of Squares between. Sum of Squares Between is the variability due to interaction between the groups. Sometimes known as the Sum of Squares of the Model.
print(n0,n1,n2,n3)
merged_data_clean_IG_2019=merged_data_clean[['2019','IncomeGroup']]
IG0=merged_data_clean_IG_2019[merged_data_clean_IG_2019['IncomeGroup']=='High income']
IG1=merged_data_clean_IG_2019[merged_data_clean_IG_2019['IncomeGroup']=='Low income']
IG2=merged_data_clean_IG_2019[merged_data_clean_IG_2019['IncomeGroup']=='Lower middle income']
IG3=merged_data_clean_IG_2019[merged_data_clean_IG_2019['IncomeGroup']=='Upper middle income']
IG0mu_2=((sum(IG0['2019']))/n0)
IG1mu_2=((sum(IG1['2019']))/n1)
IG2mu_2=((sum(IG2['2019']))/n2)
IG3mu_2=((sum(IG3['2019']))/n3)
SSB= n0*(IG0mu_2 -grandmu )**2 + n1*(IG1mu_2-grandmu)**2 + n2*(IG2mu_2-grandmu)**2 + n3*(IG3mu_2-grandmu)**2
SSB
60 29 48 50
740.1497027860728
MSSB= SSB/df1
MSSB
246.7165675953576
The variability in the data due to differences within each group.
import statistics
IG0_ss=statistics.pvariance(IG0['2019'])
IG1_ss=statistics.pvariance(IG1['2019'])
IG2_ss=statistics.pvariance(IG2['2019'])
IG3_ss=statistics.pvariance(IG3['2019'])
#this calculation was not included in SSW. it didnt result in an accurate
def variance(datav, ddof=1):
n = len(datav)
mean = sum(datav) / n
return sum((x - mean) ** 2 for x in datav) / (n - ddof)
IG0_sss=variance(IG0['2019'])
IG1_sss=variance(IG1['2019'])
IG2_sss=variance(IG2['2019'])
IG3_sss=variance(IG3['2019'])
SSW=(n0-1)*IG0_sss +(n1-1)*IG1_sss +(n2-1)*IG2_sss +(n3-1)*IG3_sss
SSW
8935.315361396688
MSSW = SSW/df2
MSSW
48.826859898342555
F=MSSB/MSSW
F
5.0528862210066565
from scipy import stats
p= stats.f.sf(F,dfT, df2)
p
8.001863810066076e-26
One rejects the the null hypothesis, H0 , if the computed F-static is greater than the critical F-statistic. The critical F-statistic is determined by the degrees of freedom and alpha value. In our case, 1-tailed , alpha= 0,05, dof = 186 so critical F = 2.347
Reject H0 if calulated F-statistics > critical F-statistic: 5.05 > 2.347
We reject the null hypothesis H0 because p<= 0.05
The % of Female Unemployment rate was measured across different income groups.
The purpose of calculating ANOVA was to see if averages of the values of % of Female Unemployment across the different Income Groups were statistically different.
We can now report that Income Group factor greatly alters the average of the % Female Unployment rate for the year 2019.
!pip install researchpy
Collecting researchpy Downloading https://files.pythonhosted.org/packages/4b/a3/b16ea25a5416f7c35824b2fc3ee1cdc9f9d8ab6d9fc4119679504d43b8a8/researchpy-0.2.3-py3-none-any.whl Requirement already satisfied: statsmodels in /usr/local/lib/python3.6/dist-packages (from researchpy) (0.10.2) Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from researchpy) (1.19.4) Requirement already satisfied: pandas in /usr/local/lib/python3.6/dist-packages (from researchpy) (1.1.5) Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from researchpy) (1.4.1) Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from statsmodels->researchpy) (0.5.1) Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas->researchpy) (2018.9) Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas->researchpy) (2.8.1) Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from patsy>=0.4.0->statsmodels->researchpy) (1.15.0) Installing collected packages: researchpy Successfully installed researchpy-0.2.3
import researchpy as rp
rp.summary_cont(merged_data_clean['2019'])
/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead. import pandas.util.testing as tm
| Variable | N | Mean | SD | SE | 95% Conf. | Interval | |
|---|---|---|---|---|---|---|---|
| 0 | 2019 | 187.0 | 8.5081 | 7.2124 | 0.5274 | 7.4676 | 9.5486 |
rp.summary_cont(merged_data_clean_IG_2019.groupby(merged_data_clean_IG_2019['IncomeGroup']))
| 2019 | ||||||
|---|---|---|---|---|---|---|
| N | Mean | SD | SE | 95% Conf. | Interval | |
| IncomeGroup | ||||||
| High income | 60 | 6.5694 | 4.3386 | 0.5601 | 5.4486 | 7.6901 |
| Low income | 29 | 7.4589 | 7.4468 | 1.3828 | 4.6263 | 10.2915 |
| Lower middle income | 48 | 8.3339 | 8.2012 | 1.1837 | 5.9525 | 10.7153 |
| Upper middle income | 50 | 11.6105 | 7.9678 | 1.1268 | 9.3461 | 13.8749 |
Sum of Squares Total will be needed to calculate eta-squared later. This is the total variability in the data:
SStotal = SSB+SSW
SStotal
9675.465064182761
Install the library
#One-Way ANOVA
!pip install pingouin
import pingouin as pg
Collecting pingouin
Downloading https://files.pythonhosted.org/packages/e6/5f/4618f878765a8b7037b8831f19105c5c2764b26e5e9afa4a29c58fc11d26/pingouin-0.3.8.tar.gz (223kB)
|████████████████████████████████| 225kB 8.6MB/s
Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.6/dist-packages (from pingouin) (1.19.4)
Requirement already satisfied: scipy>=1.3 in /usr/local/lib/python3.6/dist-packages (from pingouin) (1.4.1)
Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.6/dist-packages (from pingouin) (1.1.5)
Requirement already satisfied: matplotlib>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from pingouin) (3.2.2)
Requirement already satisfied: seaborn>=0.9.0 in /usr/local/lib/python3.6/dist-packages (from pingouin) (0.11.0)
Requirement already satisfied: statsmodels>=0.10.0 in /usr/local/lib/python3.6/dist-packages (from pingouin) (0.10.2)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.6/dist-packages (from pingouin) (0.22.2.post1)
Collecting pandas_flavor>=0.1.2
Downloading https://files.pythonhosted.org/packages/9a/57/7fbcff4c0961ed190ac5fcb0bd8194152ee1ee6487edf64fdbae16e2bc4b/pandas_flavor-0.2.0-py2.py3-none-any.whl
Collecting outdated
Downloading https://files.pythonhosted.org/packages/86/70/2f166266438a30e94140f00c99c0eac1c45807981052a1d4c123660e1323/outdated-0.2.0.tar.gz
Requirement already satisfied: tabulate in /usr/local/lib/python3.6/dist-packages (from pingouin) (0.8.7)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->pingouin) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.24->pingouin) (2018.9)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.2->pingouin) (2.4.7)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.2->pingouin) (1.3.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=3.0.2->pingouin) (0.10.0)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from statsmodels>=0.10.0->pingouin) (0.5.1)
Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn->pingouin) (1.0.0)
Requirement already satisfied: xarray in /usr/local/lib/python3.6/dist-packages (from pandas_flavor>=0.1.2->pingouin) (0.15.1)
Collecting littleutils
Downloading https://files.pythonhosted.org/packages/4e/b1/bb4e06f010947d67349f863b6a2ad71577f85590180a935f60543f622652/littleutils-0.2.2.tar.gz
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from outdated->pingouin) (2.23.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.6/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24->pingouin) (1.15.0)
Requirement already satisfied: setuptools>=41.2 in /usr/local/lib/python3.6/dist-packages (from xarray->pandas_flavor>=0.1.2->pingouin) (51.0.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->outdated->pingouin) (2020.12.5)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->outdated->pingouin) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->outdated->pingouin) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->outdated->pingouin) (2.10)
Building wheels for collected packages: pingouin, outdated, littleutils
Building wheel for pingouin (setup.py) ... done
Created wheel for pingouin: filename=pingouin-0.3.8-cp36-none-any.whl size=221687 sha256=c5385ed72fa4431562a35e1d99254d3006bd79ada10cc1ae78372b68d9eb26d1
Stored in directory: /root/.cache/pip/wheels/d6/9e/53/f885f73f29cf7c8cac3d8f4b1532bbfef2f5eb543946ac9055
Building wheel for outdated (setup.py) ... done
Created wheel for outdated: filename=outdated-0.2.0-cp36-none-any.whl size=4961 sha256=ae6560e666dbad724e1dc3efdaacb1e5d07b82377b959b3e95497ffb169e871c
Stored in directory: /root/.cache/pip/wheels/fd/7c/ef/814f514d31197310872b5abf353feb8fef9d67ee658e1e7e39
Building wheel for littleutils (setup.py) ... done
Created wheel for littleutils: filename=littleutils-0.2.2-cp36-none-any.whl size=7051 sha256=db56e5f1a155f8fc0cbec4668f10f2a1a3443c9d2038212b240a0d64d9a1829a
Stored in directory: /root/.cache/pip/wheels/53/16/9f/ac67d15c40243754fd73f620e1b9b6dedc20492ecc19a2bae1
Successfully built pingouin outdated littleutils
Installing collected packages: pandas-flavor, littleutils, outdated, pingouin
Successfully installed littleutils-0.2.2 outdated-0.2.0 pandas-flavor-0.2.0 pingouin-0.3.8
aov= pg.anova(dv='2019', between='IncomeGroup',data= merged_data_clean,detailed=True)
aov
| Source | SS | DF | MS | F | p-unc | np2 | |
|---|---|---|---|---|---|---|---|
| 0 | IncomeGroup | 740.149703 | 3 | 246.716568 | 5.052886 | 0.002189 | 0.076498 |
| 1 | Within | 8935.315361 | 183 | 48.826860 | NaN | NaN | NaN |
the impact of the factor Region to the different means of the % Female Unemployment
aov= pg.anova(dv='2019', between='Region',data= merged_data_clean,detailed=True)
aov
| Source | SS | DF | MS | F | p-unc | np2 | |
|---|---|---|---|---|---|---|---|
| 0 | Region | 1837.100070 | 6 | 306.183345 | 7.031186 | 9.754366e-07 | 0.189872 |
| 1 | Within | 7838.364994 | 180 | 43.546472 | NaN | NaN | NaN |
What follows next is unpivoting the main.data from wide to long format, optionally leaving identifiers set using melt function. One column has all the identifiers that later on, we will be selecting only one identifier % Female Unemployement and only one year 2019 of data from the column 'Year'.
main_data_m=main_data.melt(id_vars=['Country Code', 'Indicator Name'],value_vars=['1991', '1992', '1993', '1994', '1995',
'1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
'2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
'2014', '2015', '2016', '2017', '2018', '2019', '2020'])
main_data_m.head()
main_data_m= main_data_m.rename(columns={'variable': 'Year'})
main_data_m.head()
main_data_m_clean=main_data_m.dropna()
main_data_m_clean.head()
| Country Code | Indicator Name | Year | value | |
|---|---|---|---|---|
| 0 | ABW | Population ages 15-64 (% of total population) | 1991 | 68.523104 |
| 1 | ABW | Population ages 0-14 (% of total population) | 1991 | 24.084677 |
| 78 | ABW | Secondary education, duration (years) | 1991 | 5.000000 |
| 82 | ABW | Educational attainment, at least completed pos... | 1991 | 7.186510 |
| 83 | ABW | Educational attainment, at least completed pos... | 1991 | 9.047520 |
import pandas as pd
import folium
import csv
import json
stage = main_data_m_clean
stage
| Country Code | Indicator Name | Year | value | |
|---|---|---|---|---|
| 0 | ABW | Population ages 15-64 (% of total population) | 1991 | 68.523104 |
| 1 | ABW | Population ages 0-14 (% of total population) | 1991 | 24.084677 |
| 78 | ABW | Secondary education, duration (years) | 1991 | 5.000000 |
| 82 | ABW | Educational attainment, at least completed pos... | 1991 | 7.186510 |
| 83 | ABW | Educational attainment, at least completed pos... | 1991 | 9.047520 |
| ... | ... | ... | ... | ... |
| 1282884 | ZWE | Labor force, female (% of total labor force) | 2020 | 50.859818 |
| 1282956 | ZWE | Secondary education, duration (years) | 2020 | 6.000000 |
| 1282969 | ZWE | Lower secondary school starting age (years) | 2020 | 13.000000 |
| 1283012 | ZWE | Primary education, duration (years) | 2020 | 7.000000 |
| 1283019 | ZWE | Primary school starting age (years) | 2020 | 6.000000 |
546123 rows × 4 columns
main_data_m_clean_year=main_data_m_clean[main_data_m_clean['Year']=='2019']
main_data_m_clean_year_ind=main_data_m_clean_year[main_data_m_clean_year['Indicator Name']=='Unemployment, female (% of female labor force) (modeled ILO estimate)']
main_data_m_clean_year_ind
| Country Code | Indicator Name | Year | value | |
|---|---|---|---|---|
| 1197670 | AFG | Unemployment, female (% of female labor force)... | 2019 | 14.004000 |
| 1197832 | AGO | Unemployment, female (% of female labor force)... | 2019 | 6.942000 |
| 1197994 | ALB | Unemployment, female (% of female labor force)... | 2019 | 11.604000 |
| 1198318 | ARB | Unemployment, female (% of female labor force)... | 2019 | 19.954200 |
| 1198480 | ARE | Unemployment, female (% of female labor force)... | 2019 | 6.046000 |
| ... | ... | ... | ... | ... |
| 1239304 | WSM | Unemployment, female (% of female labor force)... | 2019 | 9.837000 |
| 1239628 | YEM | Unemployment, female (% of female labor force)... | 2019 | 24.879999 |
| 1239790 | ZAF | Unemployment, female (% of female labor force)... | 2019 | 30.334999 |
| 1239952 | ZMB | Unemployment, female (% of female labor force)... | 2019 | 12.237000 |
| 1240114 | ZWE | Unemployment, female (% of female labor force)... | 2019 | 5.458000 |
233 rows × 4 columns
data_to_plot = main_data_m_clean_year_ind[['Country Code','value']]
data_to_plot['Country Code'].unique()
array(['AFG', 'AGO', 'ALB', 'ARB', 'ARE', 'ARG', 'ARM', 'AUS', 'AUT',
'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS',
'BIH', 'BLR', 'BLZ', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA',
'CAF', 'CAN', 'CEB', 'CHE', 'CHI', 'CHL', 'CHN', 'CIV', 'CMR',
'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CSS', 'CUB', 'CYP',
'CZE', 'DEU', 'DJI', 'DNK', 'DOM', 'DZA', 'EAP', 'EAR', 'EAS',
'ECA', 'ECS', 'ECU', 'EGY', 'EMU', 'ERI', 'ESP', 'EST', 'ETH',
'EUU', 'FCS', 'FIN', 'FJI', 'FRA', 'GAB', 'GBR', 'GEO', 'GHA',
'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GTM', 'GUM', 'GUY', 'HIC',
'HKG', 'HND', 'HPC', 'HRV', 'HTI', 'HUN', 'IBD', 'IBT', 'IDA',
'IDB', 'IDN', 'IDX', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR',
'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KOR',
'KWT', 'LAC', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LCN', 'LDC',
'LIC', 'LKA', 'LMC', 'LMY', 'LSO', 'LTE', 'LTU', 'LUX', 'LVA',
'MAC', 'MAR', 'MDA', 'MDG', 'MDV', 'MEA', 'MEX', 'MIC', 'MKD',
'MLI', 'MLT', 'MMR', 'MNA', 'MNE', 'MNG', 'MOZ', 'MRT', 'MUS',
'MWI', 'MYS', 'NAC', 'NAM', 'NCL', 'NER', 'NGA', 'NIC', 'NLD',
'NOR', 'NPL', 'NZL', 'OED', 'OMN', 'OSS', 'PAK', 'PAN', 'PER',
'PHL', 'PNG', 'POL', 'PRE', 'PRI', 'PRK', 'PRT', 'PRY', 'PSE',
'PSS', 'PST', 'PYF', 'QAT', 'ROU', 'RUS', 'RWA', 'SAS', 'SAU',
'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SOM', 'SRB', 'SSA',
'SSD', 'SSF', 'SST', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ',
'SYR', 'TCD', 'TEA', 'TEC', 'TGO', 'THA', 'TJK', 'TKM', 'TLA',
'TLS', 'TMN', 'TON', 'TSA', 'TSS', 'TTO', 'TUN', 'TUR', 'TZA',
'UGA', 'UKR', 'UMC', 'URY', 'USA', 'UZB', 'VCT', 'VEN', 'VIR',
'VNM', 'VUT', 'WLD', 'WSM', 'YEM', 'ZAF', 'ZMB', 'ZWE'],
dtype=object)
data_to_plot
| Country Code | value | |
|---|---|---|
| 1197670 | AFG | 14.004000 |
| 1197832 | AGO | 6.942000 |
| 1197994 | ALB | 11.604000 |
| 1198318 | ARB | 19.954200 |
| 1198480 | ARE | 6.046000 |
| ... | ... | ... |
| 1239304 | WSM | 9.837000 |
| 1239628 | YEM | 24.879999 |
| 1239790 | ZAF | 30.334999 |
| 1239952 | ZMB | 12.237000 |
| 1240114 | ZWE | 5.458000 |
233 rows × 2 columns
hist_indicator = main_data_m_clean_year_ind.iloc[0]['Indicator Name']
hist_indicator
'Unemployment, female (% of female labor force) (modeled ILO estimate)'
!wget --quiet https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/world_countries.json
print('GeoJSON file downloaded!')
GeoJSON file downloaded!
wc=r'world_countries.json'
world1 = folium.Map(location=[0, 0], zoom_start=2)
world1.choropleth(geo_data =wc ,
data = data_to_plot,
columns =
['Country Code', 'value'],
key_on='feature.id',
fill_color = 'YlOrRd',
fill_opacity =0.8 ,
line_opacity = 0.1,
legend_name ='%Female Unemployment')
world1
/usr/local/lib/python3.6/dist-packages/folium/folium.py:426: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this. FutureWarning
The darker is the color, the higher is the % Unemployment
world = folium.Map(location=[0, 0], zoom_start=2, tiles = 'stamenwatercolor')
world.choropleth(geo_data =wc ,
data = data_to_plot,
columns =
['Country Code', 'value'],
key_on='feature.id',
fill_color = 'YlOrRd',
fill_opacity =0.8 ,
line_opacity = 0.1,
legend_name ='%Female Unemployment')
world
/usr/local/lib/python3.6/dist-packages/folium/folium.py:426: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this. FutureWarning
A Tile control has been added on the top right of the map.
worlds = folium.Map(location=[0, 0], zoom_start=2, tiles = 'cartodbpositron')
worlds.choropleth(geo_data =wc ,
data = data_to_plot,
columns =
['Country Code', 'value'],
key_on='feature.id',
fill_color = 'YlOrRd',
fill_opacity =0.8 ,
line_opacity = 0.1,
legend_name ='%Female Unemployment')
folium.TileLayer('Stamen Terrain').add_to(worlds)
folium.TileLayer('Stamen Toner').add_to(worlds)
folium.TileLayer('Stamen Water Color').add_to(worlds)
folium.TileLayer('cartodbpositron').add_to(worlds)
folium.TileLayer('cartodbdark_matter').add_to(worlds)
folium.LayerControl().add_to(worlds)
worlds
/usr/local/lib/python3.6/dist-packages/folium/folium.py:426: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this. FutureWarning
worldss = folium.Map(location = [0,0], zoom_start=2,tiles=None)
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(worldss)
<folium.raster_layers.TileLayer at 0x7f0e9ce8b0b8>
Change the length of the colorcodes in the color bar
myscale = (data_to_plot['value'].quantile((0,0.1,0.75,0.9,0.98,1))).tolist()
worldss.choropleth(
geo_data=wc,
name='Choropleth',
data=data_to_plot,
columns=['Country Code','value'],
key_on="feature.id",
fill_color='YlGnBu',
threshold_scale=myscale,
fill_opacity=1,
line_opacity=0.2,
legend_name='% Female Unemployment ',
smooth_factor=0
)
worldss
/usr/local/lib/python3.6/dist-packages/folium/folium.py:426: FutureWarning: The choropleth method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this. FutureWarning
!pip install geopandas
import geopandas as gpd
import folium
import branca.colormap as cm
wc=r'world_countries.json'
data_url = r'countries.geojson'
data_url = 'https://datahub.io/core/geo-countries/datapackage.json'
Collecting geopandas
Downloading https://files.pythonhosted.org/packages/f7/a4/e66aafbefcbb717813bf3a355c8c4fc3ed04ea1dd7feb2920f2f4f868921/geopandas-0.8.1-py2.py3-none-any.whl (962kB)
|████████████████████████████████| 972kB 8.9MB/s
Requirement already satisfied: shapely in /usr/local/lib/python3.6/dist-packages (from geopandas) (1.7.1)
Collecting fiona
Downloading https://files.pythonhosted.org/packages/37/94/4910fd55246c1d963727b03885ead6ef1cd3748a465f7b0239ab25dfc9a3/Fiona-1.8.18-cp36-cp36m-manylinux1_x86_64.whl (14.8MB)
|████████████████████████████████| 14.8MB 317kB/s
Collecting pyproj>=2.2.0
Downloading https://files.pythonhosted.org/packages/e4/ab/280e80a67cfc109d15428c0ec56391fc03a65857b7727cf4e6e6f99a4204/pyproj-3.0.0.post1-cp36-cp36m-manylinux2010_x86_64.whl (6.4MB)
|████████████████████████████████| 6.5MB 19.7MB/s
Requirement already satisfied: pandas>=0.23.0 in /usr/local/lib/python3.6/dist-packages (from geopandas) (1.1.5)
Requirement already satisfied: six>=1.7 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (1.15.0)
Requirement already satisfied: click<8,>=4.0 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (7.1.2)
Collecting click-plugins>=1.0
Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Collecting cligj>=0.5
Downloading https://files.pythonhosted.org/packages/42/1e/947eadf10d6804bf276eb8a038bd5307996dceaaa41cfd21b7a15ec62f5d/cligj-0.7.1-py3-none-any.whl
Requirement already satisfied: certifi in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (2020.12.5)
Collecting munch
Downloading https://files.pythonhosted.org/packages/cc/ab/85d8da5c9a45e072301beb37ad7f833cd344e04c817d97e0cc75681d248f/munch-2.5.0-py2.py3-none-any.whl
Requirement already satisfied: attrs>=17 in /usr/local/lib/python3.6/dist-packages (from fiona->geopandas) (20.3.0)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (2018.9)
Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (1.19.4)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.6/dist-packages (from pandas>=0.23.0->geopandas) (2.8.1)
Installing collected packages: click-plugins, cligj, munch, fiona, pyproj, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.1 fiona-1.8.18 geopandas-0.8.1 munch-2.5.0 pyproj-3.0.0.post1
fname =r'world_countries.json'
worldc = gpd.read_file(fname)
worldc.columns
Index(['id', 'name', 'geometry'], dtype='object')
worldc
| id | name | geometry | |
|---|---|---|---|
| 0 | AFG | Afghanistan | POLYGON ((61.21082 35.65007, 62.23065 35.27066... |
| 1 | AGO | Angola | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... |
| 2 | ALB | Albania | POLYGON ((20.59025 41.85540, 20.46317 41.51509... |
| 3 | ARE | United Arab Emirates | POLYGON ((51.57952 24.24550, 51.75744 24.29407... |
| 4 | ARG | Argentina | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... |
| ... | ... | ... | ... |
| 172 | PSE | West Bank | POLYGON ((35.54566 32.39399, 35.54525 31.78251... |
| 173 | YEM | Yemen | POLYGON ((53.10857 16.65105, 52.38521 16.38241... |
| 174 | ZAF | South Africa | POLYGON ((31.52100 -29.25739, 31.32556 -29.401... |
| 175 | ZMB | Zambia | POLYGON ((32.75937 -9.23060, 33.23139 -9.67672... |
| 176 | ZWE | Zimbabwe | POLYGON ((31.19141 -22.25151, 30.65986 -22.151... |
177 rows × 3 columns
worldc_data=worldc[['id', 'name','geometry']]
worldc_data.head()
| id | name | geometry | |
|---|---|---|---|
| 0 | AFG | Afghanistan | POLYGON ((61.21082 35.65007, 62.23065 35.27066... |
| 1 | AGO | Angola | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... |
| 2 | ALB | Albania | POLYGON ((20.59025 41.85540, 20.46317 41.51509... |
| 3 | ARE | United Arab Emirates | POLYGON ((51.57952 24.24550, 51.75744 24.29407... |
| 4 | ARG | Argentina | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... |
worldc_datar = worldc_data.rename(columns = {'id': 'Country Code'}, inplace = False)
worldc_datar
| Country Code | name | geometry | |
|---|---|---|---|
| 0 | AFG | Afghanistan | POLYGON ((61.21082 35.65007, 62.23065 35.27066... |
| 1 | AGO | Angola | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... |
| 2 | ALB | Albania | POLYGON ((20.59025 41.85540, 20.46317 41.51509... |
| 3 | ARE | United Arab Emirates | POLYGON ((51.57952 24.24550, 51.75744 24.29407... |
| 4 | ARG | Argentina | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... |
| ... | ... | ... | ... |
| 172 | PSE | West Bank | POLYGON ((35.54566 32.39399, 35.54525 31.78251... |
| 173 | YEM | Yemen | POLYGON ((53.10857 16.65105, 52.38521 16.38241... |
| 174 | ZAF | South Africa | POLYGON ((31.52100 -29.25739, 31.32556 -29.401... |
| 175 | ZMB | Zambia | POLYGON ((32.75937 -9.23060, 33.23139 -9.67672... |
| 176 | ZWE | Zimbabwe | POLYGON ((31.19141 -22.25151, 30.65986 -22.151... |
177 rows × 3 columns
data_to_plot
| Country Code | value | |
|---|---|---|
| 1197670 | AFG | 14.004000 |
| 1197832 | AGO | 6.942000 |
| 1197994 | ALB | 11.604000 |
| 1198318 | ARB | 19.954200 |
| 1198480 | ARE | 6.046000 |
| ... | ... | ... |
| 1239304 | WSM | 9.837000 |
| 1239628 | YEM | 24.879999 |
| 1239790 | ZAF | 30.334999 |
| 1239952 | ZMB | 12.237000 |
| 1240114 | ZWE | 5.458000 |
233 rows × 2 columns
world_datad=pd.merge(worldc_datar, data_to_plot, on='Country Code')
world_datadc=world_datad[['name','Country Code','geometry','value']]
world_datadc
| name | Country Code | geometry | value | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | POLYGON ((61.21082 35.65007, 62.23065 35.27066... | 14.004000 |
| 1 | Angola | AGO | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... | 6.942000 |
| 2 | Albania | ALB | POLYGON ((20.59025 41.85540, 20.46317 41.51509... | 11.604000 |
| 3 | United Arab Emirates | ARE | POLYGON ((51.57952 24.24550, 51.75744 24.29407... | 6.046000 |
| 4 | Argentina | ARG | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... | 10.922000 |
| ... | ... | ... | ... | ... |
| 162 | West Bank | PSE | POLYGON ((35.54566 32.39399, 35.54525 31.78251... | 40.945000 |
| 163 | Yemen | YEM | POLYGON ((53.10857 16.65105, 52.38521 16.38241... | 24.879999 |
| 164 | South Africa | ZAF | POLYGON ((31.52100 -29.25739, 31.32556 -29.401... | 30.334999 |
| 165 | Zambia | ZMB | POLYGON ((32.75937 -9.23060, 33.23139 -9.67672... | 12.237000 |
| 166 | Zimbabwe | ZWE | POLYGON ((31.19141 -22.25151, 30.65986 -22.151... | 5.458000 |
167 rows × 4 columns
world_datado=world_datad[['name','Country Code','geometry', 'value' ]]
world_datado.head()
| name | Country Code | geometry | value | |
|---|---|---|---|---|
| 0 | Afghanistan | AFG | POLYGON ((61.21082 35.65007, 62.23065 35.27066... | 14.004 |
| 1 | Angola | AGO | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... | 6.942 |
| 2 | Albania | ALB | POLYGON ((20.59025 41.85540, 20.46317 41.51509... | 11.604 |
| 3 | United Arab Emirates | ARE | POLYGON ((51.57952 24.24550, 51.75744 24.29407... | 6.046 |
| 4 | Argentina | ARG | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... | 10.922 |
world_datado['Country Code'].unique().shape
world_datado.isna().shape
world_datadoo=world_datado.dropna()
world_datadoo['Country Code'].unique().shape
(167,)
myscale = (world_datado['value'].quantile((0,0.1,0.75,0.9,0.98,1))).tolist()
worldss = folium.Map(location=[5,5], zoom_start=2,tiles=None, control_scale=True)
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(worldss)
<folium.raster_layers.TileLayer at 0x7f0e95179e48>
#makes boundaries plot
Boundaries = folium.GeoJson(
worldc,
style_function = lambda x: {
'color': 'black',
'weight': 1,
"opacity":1,
'fillOpacity': 0,
}).add_to(worldss)
colormap = cm.linear.YlGnBu_03.to_step(data=world_datado['value'],
method='quant', quantiles=[0,0.1,0.75,0.9,0.98,1])
colormap
#worldss = folium.Map(location=[5,5], zoom_start=2,tiles=None)
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(worldss)
colormap.caption = "% Female Unemployment"
style_function = lambda x: {'fillColor': colormap(x['properties']['value']),
'color':'#000000',
'fillOpacity': 10,
'weight': 2}
highlight_function = lambda x: {'fillColor': '#000000',
'color':'#000000',
'fillOpacity': 1,
'weight': 1}
NIL = folium.features.GeoJson(
world_datado,
style_function=style_function,
control=False,
highlight_function=highlight_function,
tooltip=folium.features.GeoJsonTooltip(
fields=['name','value'],
aliases=['Country Name: ','% Female Unemployment'],
localize = True,
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;")
)
)
worldss.add_child(NIL)
worldss.keep_in_front
#folium.LayerControl().add_to(worldss)
worldss
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(worldss)
colormap.caption = "% Female Unemployment"
style_function = lambda x: {'fillColor': colormap(x['properties']['value']),
'color':'#000000',
'fillOpacity': 2,
'weight': 1}
highlight_function = lambda x: {'fillColor': '#000000',
'color':'#000000',
'fillOpacity': 1,
'weight': 1}
NIL = folium.features.GeoJson(
world_datado,
style_function=style_function,
control=False,
highlight_function=highlight_function,
tooltip=folium.features.GeoJsonTooltip(
fields=['name','value'],
aliases=['Country Name: ','% Female Unemployment'],
localize = True,
style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;")
)
)
worldss.add_child(NIL)
worldss.keep_in_front
folium.TileLayer('Stamen Terrain').add_to(worlds)
folium.TileLayer('Stamen Toner').add_to(worlds)
folium.TileLayer('Stamen Water Color').add_to(worlds)
folium.TileLayer('cartodbpositron').add_to(worlds)
folium.TileLayer('cartodbdark_matter').add_to(worlds)
folium.LayerControl().add_to(worldss)
worldss